...taking it to the Macs!

home *** CD-ROM | disk | FTP | other *** search

/ ...taking it to the Macs! / ...taking it to the Macs!.iso / Extras / ActiveX Mac SDK / ActiveX SDK / Container Common / htparse.c < prev next >

Wrap

Text File | 1997-01-03 | 11KB | 483 lines

/* This file was derived from the libwww code, version 2.15, from CERN. A number of modifications have been made by Spyglass. eric@spyglass.com */ /* Parse HyperText Document Address HTParse.c ** ================================ */ #include "all.h" //jjo #ifdef __cplusplus extern "C" { #endif char *x_ExpandRelativeAnchor(const char *rel, const char *base); #ifdef __cplusplus } #endif #define HEX_ESCAPE '%' struct struct_parts { char *access; char *host; char *absolute; char *relative; /* char * search; no - treated as part of path */ char *anchor; }; /* Strip white space off a string ** ------------------------------ ** ** On exit, ** Return value points to first non-white character, or to 0 if none. ** All trailing white space is OVERWRITTEN with zero. */ PUBLIC char *HTStrip(char *s) { #define SPACE(c) ((c==' ')||(c=='\t')||(c=='\n')||(c=='\r')) char *p = s; if (!s) return NULL; /* Doesn't dump core if NULL */ for (p = s; *p; p++) ; /* Find end of string */ for (p--; p >= s; p--) { if (SPACE(*p)) *p = 0; /* Zap trailing blanks */ else break; } while (SPACE(*s)) s++; /* Strip leading blanks */ return s; } /* Scan a filename for its consituents ** ----------------------------------- ** ** On entry, ** name points to a document name which may be incomplete. ** On exit, ** absolute or relative may be nonzero (but not both). ** host, anchor and access may be nonzero if they were specified. ** Any which are nonzero point to zero terminated strings. */ PRIVATE void scan(char *name, struct struct_parts *parts) { char *after_access; char *p; int length = strlen(name); parts->access = 0; parts->host = 0; parts->absolute = 0; parts->relative = 0; parts->anchor = 0; after_access = name; for (p = name; *p; p++) { if (*p == ':') { *p = 0; parts->access = name; /* Access name has been specified */ after_access = p + 1; } if (*p == '/') break; if (*p == '#') break; } if (length > 0) { for (p = name + length - 1; p >= name; p--) { if (*p == '#') { parts->anchor = p + 1; *p = 0; /* terminate the rest */ } } } p = after_access; if (*p == '/') { if (p[1] == '/') { parts->host = p + 2; /* host has been specified */ *p = 0; /* Terminate access */ p = strchr(parts->host, '/'); /* look for end of host name if any */ // if this is a "file" access, what appears to be a host may really be a volume if (!strcmp(parts->access, "file")) { char vBuffer[256]; // make a proper mac style version of the "host" name { strcpy(vBuffer, parts->host); c2pstr(vBuffer); vBuffer[0] = 1 + p - parts->host; vBuffer[vBuffer[0]] = ':'; } // check to see if the "host" name matches that of a mounted volumes if (vBuffer[0] > 1) { HParamBlockRec vParamBlock; memset(&vParamBlock, 0, sizeof(vParamBlock)); vParamBlock.volumeParam.ioNamePtr = (unsigned char *)vBuffer; vParamBlock.volumeParam.ioVolIndex = -1; if (!PBHGetVInfoSync(&vParamBlock)) { // it did; we will assume the "host" name is a mistake parts->host = after_access + 1; p = parts->host; } } } if (p) { *p = 0; /* Terminate host */ parts->absolute = p + 1; /* Root has been found */ } } else { parts->absolute = p + 1; /* Root found but no host */ } } else { parts->relative = (*after_access) ? after_access : 0; /* zero for "" */ } return; } /*scan */ /* Parse a Name relative to another name ** ------------------------------------- ** ** This returns those parts of a name which are given (and requested) ** substituting bits from the related name where necessary. ** ** On entry, ** aName A filename given ** relatedName A name relative to which aName is to be parsed ** wanted A mask for the bits which are wanted. ** ** On exit, ** returns A pointer to a malloc'd string which MUST BE FREED */ char *HTParse(const char *aName, const char *relatedName, int wanted) { char *return_value = 0; char *p; char *access; struct struct_parts given, related; char name[MAX_URL_STRING+1]; char rel[MAX_URL_STRING+1]; char result[2*MAX_URL_STRING+1]; /* Make this longer to avoid overflow */ /* Make working copies of input strings to cut up: */ GTR_strncpy(name, aName, MAX_URL_STRING); GTR_strncpy(rel, relatedName, MAX_URL_STRING); scan(name, &given); scan(rel, &related); /* For the given part, if we get a URL which contains a protocol and a host, but not an absolute, then it looked something like this: http://www.spyglass.com:4040 We need to assume that the slash at the end should be there, or when this is found as a hyperlink in a document, it will steal the absolute part from the URL of the document itself (related). */ if (given.access && given.host && !given.absolute) { GTR_strncpy(name, aName, MAX_URL_STRING); strcat(name, "/"); scan(name, &given); } result[0] = 0; /* Clear string */ access = given.access ? given.access : related.access; if (wanted & PARSE_ACCESS) if (access) { strcat(result, access); if (wanted & PARSE_PUNCTUATION) strcat(result, ":"); } if (given.access && related.access) /* If different, inherit nothing. */ if (strcmp(given.access, related.access) != 0) { related.host = 0; related.absolute = 0; related.relative = 0; related.anchor = 0; } if (wanted & PARSE_HOST) if (given.host || related.host) { char *tail = result + strlen(result); if (wanted & PARSE_PUNCTUATION) strcat(result, "//"); if (given.host) { strcat(result, given.host); } else { strcat(result, related.host); } /* Ignore default port numbers, and trailing dots on FQDNs which will only cause identical adreesses to look different */ { char *p; p = strchr(tail, ':'); if (p && access) { /* Port specified */ if ( ( strcmp(access, "http") == 0 && strcmp(p, ":80") == 0) || ( strcmp(access, "gopher") == 0 && strcmp(p, ":70") == 0) #ifdef SHTTP_ACCESS_TYPE || ( strcmp(access, "shttp") == 0 && strcmp(p, ":80") == 0) #endif ) *p = (char) 0; /* It is the default: ignore it */ } if (!p) p = tail + strlen(tail); /* After hostname */ if (strlen (p)) /* -dpg */ { p--; /* End of hostname */ if (*p == '.') *p = (char) 0; /* chop final . */ } } } if (given.host && related.host) /* If different hosts, inherit no path. */ if (strcmp(given.host, related.host) != 0) { related.absolute = 0; related.relative = 0; related.anchor = 0; } if (wanted & PARSE_PATH) { if (given.absolute) { /* All is given */ if (wanted & PARSE_PUNCTUATION) strcat(result, "/"); strcat(result, given.absolute); } else if (related.absolute) { /* Adopt path not name */ strcat(result, "/"); strcat(result, related.absolute); if (given.relative) { p = strchr(result, '?'); /* Search part? */ if (!p) p = result + strlen(result) - 1; for (; *p != '/'; p--) ; /* last / */ p[1] = 0; /* Remove filename */ strcat(result, given.relative); /* Add given one */ HTSimplify(result); } } else if (given.relative) { /* The following 3 lines were copied from NCSA Mosaic for Windows */ if ((wanted & PARSE_HOST) && (given.host || related.host) && (wanted & PARSE_PUNCTUATION)) if (result[strlen(result) - 1] != '/') strcat(result, "/"); strcat(result, given.relative); /* what we've got */ } else if (related.relative) { strcat(result, related.relative); } else { /* No inheritance */ if (!strcmp(result, "mailto:")) // mailto: ; else if (!strcmp(result, "news:")) ; else // protocol ends with a slash strcat(result, "/"); } } if (wanted & PARSE_ANCHOR) if (given.anchor || related.anchor) { if (wanted & PARSE_PUNCTUATION) strcat(result, "#"); strcat(result, given.anchor ? given.anchor : related.anchor); } /* We truncate URLs to 1024 bytes if they're too long. */ result[MAX_URL_STRING] = '\0'; return_value = GTR_strdup(result); return return_value; /* exactly the right length */ } /* ** As strcpy() but guaranteed to work correctly ** with overlapping parameters. AL 7 Feb 1994 */ PRIVATE void ari_strcpy(char *to, char *from) { char *tmp; if (!to || !from) return; tmp = (char *) GTR_MALLOC(strlen(from) + 1); if (tmp) { strcpy(tmp, from); strcpy(to, tmp); GTR_FREE(tmp); } else { /* TODO */ } } /* Simplify a filename // ------------------- // // A unix-style file is allowed to contain the seqeunce xxx/../ which may be // replaced by "" , and the seqeunce "/./" which may be replaced by "/". // Simplification helps us recognize duplicate filenames. // // Thus, /etc/junk/../fred becomes /etc/fred // /etc/junk/./fred becomes /etc/junk/fred // // but we should NOT change // http://fred.xxx.edu/../.. // // or ../../albert.html */ PUBLIC void HTSimplify(char *filename) { char *p = filename; char *q; if (p) { while (*p && (*p == '/' || *p == '.')) /* Pass starting / or .'s */ p++; while (*p) { if (*p == '/') { if ((p[1] == '.') && (p[2] == '.') && (p[3] == '/' || !p[3])) { for (q = p - 1; (q >= filename) && (*q != '/'); q--) ; /* prev slash */ if (q[0] == '/' && 0 != strncmp(q, "/../", 4) && !(q - 1 > filename && q[-1] == '/')) { ari_strcpy(q, p + 3); /* Remove /xxx/.. */ if (!*filename) strcpy(filename, "/"); p = q - 1; /* Start again with prev slash */ } else { if (q[0] == '/' && (q - 1 > filename && q[-1] == '/')) { /* The so-called prev slash found is actually the one before the hostname! The URL looks like this: http://host.somewhere.com/../path ^ ^ | | q p We now need to fix the URL to remove the ../ */ ari_strcpy(p, p + 3); } } } else if ((p[1] == '.') && (p[2] == '/' || !p[2])) { ari_strcpy(p, p + 2); /* Remove a slash and a dot */ } #if 0 else if (p[-1] != ':') { while (p[1] == '/') { ari_strcpy(p, p + 1); /* Remove multiple slashes */ } } #endif } p++; } /* end while (*p) */ } /* end if (p) */ } /* from html.c */ char *x_ExpandRelativeAnchor(const char *rel, const char *base) { char *pTemp = 0; char *stripped; char *result = NULL; if (!rel) { rel = ""; } pTemp = GTR_strdup(rel); if(!base) return pTemp; stripped = HTStrip(pTemp); result = HTParse(stripped, base, PARSE_PUNCTUATION | PARSE_ACCESS | PARSE_HOST | PARSE_PATH | PARSE_ANCHOR); GTR_FREE(pTemp); return result; }